import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
pwd
df = pd.read_csv("wbcd.csv")
df.head()
df.keys()
df.describecribecribe()
sns.pairplot(df,hue='diagnosis',palette='Set1')
X = df.drop('diagnosis',axis=1)
y = df['diagnosis']
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)
from sklearn.ensemble import BaggingClassifier # imported bagging classifier algorithm
BaggingClassifier?
from sklearn.neighbors import KNeighborsClassifier
m = KNeighborsClassifier(n_neighbors=3)
# lets make a bagging classifier
bag = BaggingClassifier(
m,
max_samples=.5,
max_features=2,
n_jobs=2,
oob_score=True)
bag.fit(X_train, y_train) #
pred = bag.predict(X_test)
from sklearn.metrics import classification_report,confusion_matrix
print(confusion_matrix(y_test,pred))
print(classification_report(y_test,pred))
bag.oob_score_
bag.score(X, y)
from sklearn.tree import DecisionTreeClassifier
DecisionTreeClassifier?
Ds = DecisionTreeClassifier()
Ds.fit(X,y)
pred = Ds.predict(X_test)
print(confusion_matrix(y_test,pred))
print(classification_report(y_test,pred))
Ds.score(X,y)
from sklearn.ensemble import RandomForestClassifier
RandomForestClassifier?
Rm = RandomForestClassifier(n_estimators=20, oob_score=True)
Rm.fit(X, y)
pred = Rm.predict(X_test)
print(confusion_matrix(y_test,pred))
print(classification_report(y_test,pred))
Rm.score(X, y)
from sklearn.ensemble import AdaBoostClassifier
AdaBoostClassifier?
Ad = AdaBoostClassifier(base_estimator=None, n_estimators=100)
Ad.fit(X, y)
pred = Ad.predict(X_test)
print(confusion_matrix(y_test,pred))
print(classification_report(y_test,pred))
Ad.score(X, y)
from sklearn.ensemble import GradientBoostingClassifier
GradientBoostingClassifier?
Gb = GradientBoostingClassifier(n_estimators=10)
Gb.fit(X, y)
Gb.score(X, y)
pred = Gb.predict(X_test)
print(confusion_matrix(y_test,pred))
print(classification_report(y_test,pred))
Gb.score(X, y)
from sklearn.ensemble import VotingClassifier
VotingClassifier?
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
Vm = VotingClassifier(
estimators=[('lr', LogisticRegression()),
('rf', RandomForestClassifier()),
('gnb', GaussianNB())], voting='hard')
Vm.fit(X, y)
Vm.score(X, y)